% Lucia Wagner and Sara Clifton - September 2021
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% This function fits the ACE (abstain-cigarette-ecig) model to adolescent data
%    - inputs: none
%    - outputs: plots time (in real years) versus ACE populations, plots
%    time (in real years) versus public health Ratio R, interpolates
%    maximum vaping prevalence and public health ratio
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [] = LW_youth_fit_ACEmodel()
% load data for smoking/vaping prevalence
load("YouthSmokingData.mat"); 
% store data as a matrix ('NaN' where data is not available)
%    column 1: year
%    column 2: smoking
%    column 3: vaping
data = YouthSmokingDataCopy;

% initial guesses for parameters
uC0 = 0.661; uCinf = 0.378; % initial and final utility of smoking cigarettes (w/o ecigs)
uCinf_wE = 0.1; % final utility of smoking cigarettes (w/ ecigs)
uE0 = 0.6; uEinf = 0.08; % initial and final utility of vaping e-cigs
lambda = 0.18; % how quickly new knowledge is created and absorbed
alpha = 0.963; % how quickly people respond to social changes (Lang et al. 2015)

vapingDataYears = data(~isnan(data(:,3)),1);        % find years with vaping data
vapingData = data(~isnan(data(:,3)),3);             % find corresponding vaping data
smokingData_postVaping = data(~isnan(data(:,3)),2); % find corresponding smoking data (w/ ecigs)
T0C = data(1,1);          % first year with smoking data
T0E = vapingDataYears(1); % first year with vaping data

% find data with smoking only (before vaping)
indicatorSmokingOnly = logical(~isnan(data(:,2)).*(data(:,1)<=T0E));
smokingDataYears = data(indicatorSmokingOnly,1); % find years with smoking data (before vaping)
smokingData = data(indicatorSmokingOnly,2); % find corresponding smoking data (w/o ecigs)

% find data pre-vaping for average residual ('AV')
indicatorSmokingOnlyAV = logical(~isnan(data(:,2)).*(data(:,1)<T0E));
smokingDataAV = data(indicatorSmokingOnlyAV,2); % find corresponding smoking data (w/o ecigs)
smokingDataYearsAV = data(indicatorSmokingOnlyAV,1); % find years with smoking data (before vaping)

tipC = 1995;              % tipping point of smoking info
% used for sensitivity analysis, affects maximum vaping prevalence and
% public health ratio (r)
tipE = 2025;              % tipping point of vaping info, variable perform sensitivity analysis
C0 = data(1,2);           % initial fraction of smokers in data
E0 = vapingData(1);       % initial fraction of vapers in data

% utility function for cigs and ecigs
ufunc =@(t,u0,uinf,tip,lambda) u0+(uinf-u0)./(1+exp(-lambda*(t-tip)));
% probability (per unit time) of transitioning to a new group
P =@(prev,util) prev^alpha*util; % send prevalence and utility of new group

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%                     Fit Round 1 - just smoking data
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
x0 = [uC0,uCinf,lambda,C0]; % vector of fitting parameters (guesses)
% search for best fit parameters for smoking
x = fminsearch(@(x)computeError_AC(x,tipC,ufunc,P,smokingDataYears,smokingData),x0);
% extract best fit parameters
uChat = [x(1),x(2)]; % best fit cig utilities
lambdaHat = x(3);    % best fit lambda
IChat = x(4);      % best fit C0

t0 = T0C;            % beginning year of simulation (beginning of data)
tend = data(end,1)+41;  % end year of simulation (end of data, or longer), +11 to 2030
tvec = t0:0.1:tend;  % time vector for best fit model (pre-vaping)

% evaluate model (counterfactual)
[T_cf,Y_cf] = integrate_ACmodel(uChat,tipC,lambdaHat,ufunc,P,tvec,IChat);

PCS = Y_cf(T_cf>=T0E,1); % prevalence of smoking (counterfactual) post vape

% for average residual
tvec2 = smokingDataYearsAV; % solely end of data, no future prediction, iterations of 1 year
[T_cf2,Y_cf2] = integrate_ACmodel(uChat,tipC,lambdaHat,ufunc,P,tvec2,IChat);
smoking_prevape_av = Y_cf2(T_cf2<T0E,1); % smoking model pre vape

% for smokers diverted by vaping (counterfactual)
tend3 = data(end,1)+11; % to 2030
tvec3 = t0:1:tend3; % iterations of 1 year
[T_cf3,Y_cf3] = integrate_ACmodel(uChat,tipC,lambdaHat,ufunc,P,tvec3,IChat);
smoking_cfac = Y_cf3(T_cf3>=T0E,1); % smoking counterfactual post vape year
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%                     Fit Round 2 - smoking and vaping
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
x0=[uCinf_wE,uE0,uEinf,E0]; % vector of fitting parameters (guesses)
t0 = T0C;            % beginning year of simulation (beginning of smoking data)
tend = T0E;          % end year of simulation (end of smoking data)
tvec = t0:tend;      % time vector (pre-vaping)
% evaluate model to get IC for input
[~,Y] = integrate_ACmodel(uChat,tipC,lambdaHat,ufunc,P,tvec,IChat);
C0_T0E = Y(end,1);   % get smoking prevalence at T0E (from model)

% search for best fit parameters for vaping
x = fminsearch(@(x)computeError_ACE(x,tipC,tipE,lambdaHat,ufunc,P,...
    C0_T0E,uChat(1),vapingDataYears,[smokingData_postVaping,vapingData]),x0);
% extract best fit parameters
uC_T0E_hat = [uChat(1), x(1)]; % new cig utility (after vaping) from best fit
uEhat = [x(2),x(3)];           % vaping utilities from best fit
IC_T0E_hat = [C0_T0E,x(4)];    % IC at introduction of vaping

t0 = T0E;            % beginning year of simulation (beginning of vaping data)
tend = data(end,1)+41;  % end year of simulation (end of data, or longer)
tvec = t0:0.1:tend;  % time vector for best fit model (post-vaping)

% evaluate model (real world)
[T_rw,Y_rw] = integrate_ACEmodel(uC_T0E_hat,uEhat,tipC,tipE,lambdaHat,ufunc,P,tvec,IC_T0E_hat);

PRS = Y_rw(T_rw>=T0E,1); % prevalence of smoking (real world) post vape
PV = Y_rw(T_rw>=T0E,2);  % prevalence of vaping (real world) post vape

% for average residual
tend2 = data(end,1); % solely end of data, no future prediction
tvec2 = t0:1:tend2; % iterations of 1 year
[T_rw2,Y_rw2] = integrate_ACEmodel(uC_T0E_hat,uEhat,tipC,tipE,lambdaHat,ufunc,P,tvec2,IC_T0E_hat);
smoking_postvape_av = Y_rw2(T_rw2>=T0E,1); % smoking model post vape
vaping_av = Y_rw2(T_rw2>=T0E,2); % vaping model

% for smokers diverted by vaping (real life)
tend3 = data(end,1)+11; % to 2030
tvec3 = t0:1:tend3; % iterations of 1 year
[T_rw3,Y_rw3] = integrate_ACEmodel(uC_T0E_hat,uEhat,tipC,tipE,lambdaHat,ufunc,P,tvec3,IC_T0E_hat);
smoking_rlac = Y_rw3(T_rw3>=T0E,1); % smoking real life post vape year
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%                  Calculate public health cost/benefit
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% integral to calculate prevalence of smokers diverted by vaping
t1 = 2011; % year vaping introduced into society
t2 = 2030; % future year
timevec = t1:1:t2; % iterations of 1 year
trapvec = t2-t1;

% smoking real life post vape year
lenY1 = length(smoking_rlac(:,1));
traprw=trapz(timevec,smoking_rlac((lenY1-trapvec):lenY1,1));

% smoking counterfactual post vape year
lenY2 = length(smoking_cfac(:,1));
trapcf=trapz(timevec,smoking_cfac((lenY2-trapvec):lenY2,1));

% prevalence of smokers diverted by vaping per year
trap = (trapcf-traprw)/trapvec

% average residual
a1 = [smoking_prevape_av; smoking_postvape_av; vaping_av];
a2 = [smokingDataAV; smokingData_postVaping; vapingData];
a3 = abs(a1-a2);
err = mean(a3,'omitnan')

% Figure 7, maximum vaping prevalence
max_vape = max(Y_rw(:,2));

% Figure 4
figure
p1=plot(T_cf,Y_cf(:,1),'k');
hold on
p2=plot(T_rw,Y_rw(:,1),'b'); % add smoking model (real world) to plot
p3=plot(T_rw,Y_rw(:,2),'r'); % add vaping model (real world) to plot
xline(T0E,'--k','HandleVisibility',"off"); % vertical line for T0E
xlabel('Year')
ylabel('Prevalence')

shadedErrorBar(T_cf,Y_cf(:,1),err*ones(length(T_cf),1),'k');
shadedErrorBar(T_rw,Y_rw(:,1),err*ones(length(T_rw),1),'b');
shadedErrorBar(T_rw,Y_rw(:,2),err*ones(length(T_rw),1),'r');
set(gca,'Layer','top')
p4=plot(data(:,1),data(:,2),'ob'); % add smoking data
p5=plot(data(:,1),data(:,3),'or'); % add vaping data
ylim([0 0.60])
xlim([1991 2040])
legend([p1 p2 p3 p4 p5],{'model - counterfactual','model - smoking','model - vaping','smoking data','vaping data'},'Location',"best")

t = T_rw; % time vector from introduction of e-cigarettes to end time (arbitrary)
r=zeros(length(t),1); % zeroes matrix length of t
for i=2:length(t) % start with 2 to avoid dividing by zero
    pv=trapz(t(1:i),PV(1:i));   % area under vaping curve
    pcs=trapz(t(1:i),PCS(1:i)); % area under counterfactual smoking curve
    prs=trapz(t(1:i),PRS(1:i)); % area under real world smoking curve
    r(i)=pv/(pcs-prs);          % ratio of vaping to smoking difference
end
% Figure 5
     figure
     plot(t(10:end),r(10:end),'Color','b','LineWidth',4) % start one year in
     xlabel('Year','FontSize',16)
     ylabel('Public health ratio (r)','FontSize',16)
     title('Youth')
     set(gca,'FontSize',14)
     set(gca,'YTickLabel',[])
     ylim([0 10])

% Figure 7, public health ratio (r) in 2030
%desiredY = interp1(t(10:end),r(10:end),2030);

% sensitivity analysis, Figure 6
% figure
% plot(T_rw,Y_rw(:,2),'g','LineWidth',4)
% xlabel('Year','FontSize',14)
% ylabel('Prevalence','FontSize',14)
% set(gca,'YTickLabel',[],'XTickLabel',[])
% ylim([0 1])
% xlim([2010 2050])
% set(gca,'FontSize',16)
% title('Youth')
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% This function computes the error between ACE model and data
%    - inputs: x is [uCinf,uE0,uEinf,E0]
%              u is matrix of group utilities
%              P is transition probability function handle
%              tvec is vector of time points
%              IC is vector of initial population fractions
%    - outputs: T is vector of time points
%               Y is matrix of ACE fractions over time
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function err = computeError_ACE(x,tipC,tipE,lambda,ufunc,P,C0,uC0,tvec,data)
pow = 1.2; % penalty on residual, lower number = closer to started generally (less discrimination)
% extract fitted values
uC = [uC0,x(1)];  % initial smoking utility found earlier
uE = [x(2),x(3)]; % fitted e-cig utilities
IC = [C0,x(4)];   % initial smoking value found earlier
% integrate model
[~,Y] = integrate_ACEmodel(uC,uE,tipC,tipE,lambda,ufunc,P,tvec,IC);
% find the average sum of squared error between model and data (C, E)
resid = [abs(Y(:,1)-data(:,1)); abs(Y(:,2)-data(:,2))]; % residuals
RSS = sum(resid.^pow,'omitnan'); % residual sum of squares
err = real(mean(RSS)); % average error
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% This function computes the error between AC model and data 
%    - inputs: x is [uC0,uCinf,lambda,C0]
%              u is matrix of group utilities
%              P is transition probability function handle
%              tvec is vector of time points
%              IC is vector of initial population fractions
%    - outputs: T is vector of time points
%               Y is matrix of ACE fractions over time
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function err = computeError_AC(x,tip,ufunc,P,tvec,data)
pow = 1.2; % penalty on residual, lower number = closer to started generally (less discrimination)
% extract fitted values
uC = [x(1),x(2)];  % smoking utilities
lambda = x(3);     % lambda
IC = x(4);         % initial smoking prevalence
% integrate model
[~,Y] = integrate_ACmodel(uC,tip,lambda,ufunc,P,tvec,IC);
% find the average sum of squared error between model and data (C)
resid = abs(Y(:,1)-data(:)); % residuals
RSS = sum(resid.^pow,'omitnan'); % residual sum of squares
err = real(mean(RSS)); % average error
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% This function numerically integrates the ACE model
%    - inputs: u is matrix of group utilities
%              P is transition probability function handle
%              tvec is vector of time points
%              IC is vector of initial population fractions
%    - outputs: T is vector of time points
%               Y is matrix of ACE fractions over time
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [T,Y] = integrate_ACEmodel(uC,uE,tipC,tipE,lambda,ufunc,P,tvec,IC)
% set integration tolerances
options = odeset('AbsTol',1e-6,'RelTol',1e-6,'MaxStep',0.1);
% integrate system
[T,Y] = ode15s(@(t,y)odeLHS_ACE(t,y,uC,uE,tipC,tipE,lambda,ufunc,P),tvec,IC,options);
% add abstainers to results
Y = [Y, 1-sum(Y,2)];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% This function numerically integrates the AC model
%    - inputs: u is matrix of group utilities
%              P is transition probability function handle
%              tvec is vector of time points
%              IC is vector of initial population fractions
%    - outputs: T is vector of time points
%               Y is matrix of AC fractions over time
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [T,Y] = integrate_ACmodel(uC,tip,lambda,ufunc,P,tvec,IC)
% set integration tolerances
options = odeset('AbsTol',1e-6,'RelTol',1e-6,'MaxStep',0.1);
% integrate system
[T,Y] = ode15s(@(t,y)odeLHS_AC(t,y,uC,tip,lambda,ufunc,P),tvec,IC,options);
% add abstainers to results
Y = [Y, 1-Y];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% This function is the right hand side of the dynamical system (smoking, 
% vaping and abstaining - after introduction of vaping)
%    - inputs: t is the current time
%              y is vector of current ACE fractions
%              u is matrix of group utilities
%              P is transition probability function handle
%    - outputs: dy is right hand side of ACE dynamical system
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function dy = odeLHS_ACE(t,y,uC,uE,tipC,tipE,lambda,ufunc,P)
C = y(1); E = y(2); A = 1-C-E; % fraction smokers (C), vapers (E), abstainers (A)
uC0 = uC(1); uCinf = uC(2);    % initial and final utility of smoking (after vaping)
uE0 = uE(1); uEinf = uE(2);    % initial and final utility of vaping
uCt = ufunc(t,uC0,uCinf,tipC,lambda); % utility of smoking at current time
uEt = ufunc(t,uE0,uEinf,tipE,lambda); % utility of vaping at current time
uAt = 1-uCt-uEt;               % utility of abstaining at current time
dC = E*P(C,uCt) + A*P(C,uCt) - C*(P(E,uEt) + P(A,uAt)); % change in smokers (dC/dt)
dE = A*P(E,uEt) + C*P(E,uEt) - E*(P(A,uAt) + P(C,uCt)); % change in vapers (dE/dt) 
dy = [dC;dE]; % send back results
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% This function is the right hand side of the dynamical system (smoking and
% abstaining only - before introduction of vaping)
%    - inputs: t is the current time
%              y is vector of current AC fractions
%              u is matrix of group utilities
%              P is transition probability function handle
%    - outputs: dy is right hand side of AC dynamical system
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function dy = odeLHS_AC(t,y,uC,tip,lambda,ufunc,P)
C = y(1); A = 1-C;            % fraction smokers (C), abstainers (A)
uC0 = uC(1); uCinf = uC(2);   % initial and final utility of smoking before vaping
uCt = ufunc(t,uC0,uCinf,tip,lambda); % utility of smoking at current time
uAt = 1-uCt;                  % utility of abstaining at current time

dC = A*P(C,uCt) - C*P(A,uAt); % change in smokers (dC/dt)
dy = dC; % send back results
end